In [ ]:
import warnings
warnings.filterwarnings('ignore')
import librosa
audio_path = 'test.wav'
x , sr = librosa.load(audio_path)
import IPython.display as ipd
ipd.Audio(audio_path)
Out[ ]:
Your browser does not support the audio element.
In [ ]:
%matplotlib inline
import sklearn
import matplotlib.pyplot as plt
import librosa.display

plt.figure(figsize=(20, 5))
librosa.display.waveshow(x, sr=sr)
Out[ ]:
<librosa.display.AdaptiveWaveplot at 0x149ee6dd0>
In [ ]:
X = librosa.stft(x)
Xdb = librosa.amplitude_to_db(abs(X))
plt.figure(figsize=(20, 5))
librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='hz')
plt.colorbar()
Out[ ]:
<matplotlib.colorbar.Colorbar at 0x14a053850>
In [ ]:
librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='log')
plt.colorbar()
Out[ ]:
<matplotlib.colorbar.Colorbar at 0x14a2db850>
In [ ]:
import numpy as np
import matplotlib.pyplot as plt
import librosa
import librosa.display

def analyze_prosody(audio_file):
    # Load audio file
    y, sr = librosa.load(audio_file)

    # Extract pitch using librosa's pitch tracking
    pitches, magnitudes = librosa.piptrack(y=y, sr=sr)

    # Calculate pitch (in Hz) from magnitudes
    pitch = np.mean(pitches[magnitudes > np.max(magnitudes) * 0.9])

    # Extract tempo (pace)
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)

    # Calculate root mean square (RMS) energy
    rms = librosa.feature.rms(y=y)[0]

    # Normalize RMS to 0-1 range
    rms_norm = (rms - np.min(rms)) / (np.max(rms) - np.min(rms))

    # Plotting
    plt.figure(figsize=(12, 8))

    # Plot pitch
    plt.subplot(3, 1, 1)
    librosa.display.specshow(pitches, sr=sr, x_axis='time', y_axis='linear')
    plt.colorbar()
    plt.title('Pitch')

    # Plot tempo
    plt.subplot(3, 1, 2)
    plt.plot(np.arange(len(y)) / sr, y)
    plt.xlabel('Time (s)')
    plt.ylabel('Amplitude')
    plt.title('Waveform')

    # Plot RMS energy
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(len(rms)) / sr, rms_norm)
    plt.xlabel('Time (s)')
    plt.ylabel('RMS Energy')
    plt.title('RMS Energy')

    plt.tight_layout()
    plt.show()

    return pitch, tempo, rms_norm

# Example usage
audio_file = 'test.wav'
pitch, tempo, rms_norm = analyze_prosody(audio_file)
print("Average pitch:", pitch, "Hz")
print("Tempo (pace):", tempo, "BPM")
Average pitch: 710.651 Hz
Tempo (pace): 143.5546875 BPM